#####install packages
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loaded gbm 2.1.5
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
##read in data
C = read.csv("Calls.csv")
C$Species_no = 0
igrey = which(C$Species=="Grey")
C$Species_no[igrey]=1
rm = c("Name", "Species")
keep= setdiff(names(C), rm)
C = C[,keep]
save(C, file = "C.Rdata")
load("C.Rdata")
# get the feature real names
label_col = which(names(C)== "Species_no")
names <- colnames(C[,-c(label_col)])
y_col = label_col
model<-as.formula(paste(colnames(C)[y_col], "~",
paste(names,collapse = "+"),
sep = ""))
model
## Species_no ~ No_of_harmonics + Agg_Entropy + Avg_Entropy + Avg_Power +
## BW_90. + Center_Freq + Center_Time + Center_Time_Rel + Delta_Freq +
## Delta_Time + Dur_90. + Energy + Freq_5. + Freq_5.._Rel +
## Freq_95. + Freq_95._Rel + IQR_BW + IQR_Dur + Inband_Power +
## Max_Entropy + Max_Freq + Max_Power + Max_Time + Min_Entropy +
## Peak_Freq + PFC_Max_Freq + PFC_Max_Slope + PFC_Min_Freq +
## PFC_Num_Inf_Pts + Peak_Power + Peak_Time + Peak_Time_Relative +
## Sample_Length + Time_5. + Time_5._Rel + Time_95. + Time_95._Rel
DP =createDataPartition(y = C$Species_no,
p = 0.8,
list = FALSE)
Train = C[DP,]
Test = C[-DP,]
ptm<-proc.time()
n.trees = 15000
shrinkage = 0.001#final version should be 0.001
cv.folds = 10#final version should be 10
gbmtest<- gbm(model,
data=Train,
distribution="bernoulli",
n.trees=n.trees,
shrinkage=shrinkage,
interaction.depth=3,
bag.fraction=0.50,
train.fraction=1,
n.minobsinnode=5,
cv.folds=cv.folds,
keep.data=TRUE,
verbose=TRUE,
n.cores=NULL)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3802 nan 0.0010 0.0002
## 2 1.3792 nan 0.0010 0.0003
## 3 1.3784 nan 0.0010 0.0001
## 4 1.3776 nan 0.0010 0.0000
## 5 1.3767 nan 0.0010 0.0003
## 6 1.3762 nan 0.0010 -0.0001
## 7 1.3754 nan 0.0010 0.0002
## 8 1.3746 nan 0.0010 0.0002
## 9 1.3739 nan 0.0010 0.0001
## 10 1.3731 nan 0.0010 -0.0000
## 20 1.3656 nan 0.0010 0.0000
## 40 1.3503 nan 0.0010 0.0002
## 60 1.3350 nan 0.0010 0.0001
## 80 1.3197 nan 0.0010 0.0002
## 100 1.3055 nan 0.0010 0.0002
## 120 1.2909 nan 0.0010 0.0001
## 140 1.2784 nan 0.0010 -0.0002
## 160 1.2645 nan 0.0010 0.0001
## 180 1.2516 nan 0.0010 -0.0000
## 200 1.2385 nan 0.0010 0.0002
## 220 1.2255 nan 0.0010 0.0001
## 240 1.2118 nan 0.0010 0.0000
## 260 1.1992 nan 0.0010 0.0002
## 280 1.1866 nan 0.0010 0.0000
## 300 1.1741 nan 0.0010 0.0002
## 320 1.1617 nan 0.0010 0.0001
## 340 1.1501 nan 0.0010 0.0002
## 360 1.1383 nan 0.0010 -0.0000
## 380 1.1269 nan 0.0010 0.0001
## 400 1.1160 nan 0.0010 0.0000
## 420 1.1056 nan 0.0010 0.0001
## 440 1.0941 nan 0.0010 0.0001
## 460 1.0848 nan 0.0010 -0.0000
## 480 1.0739 nan 0.0010 0.0003
## 500 1.0642 nan 0.0010 -0.0000
## 520 1.0542 nan 0.0010 -0.0001
## 540 1.0441 nan 0.0010 0.0001
## 560 1.0337 nan 0.0010 -0.0000
## 580 1.0245 nan 0.0010 0.0001
## 600 1.0152 nan 0.0010 0.0002
## 620 1.0061 nan 0.0010 0.0000
## 640 0.9962 nan 0.0010 0.0001
## 660 0.9867 nan 0.0010 0.0000
## 680 0.9776 nan 0.0010 0.0001
## 700 0.9688 nan 0.0010 -0.0000
## 720 0.9604 nan 0.0010 -0.0000
## 740 0.9512 nan 0.0010 0.0000
## 760 0.9422 nan 0.0010 0.0001
## 780 0.9341 nan 0.0010 0.0000
## 800 0.9254 nan 0.0010 0.0001
## 820 0.9168 nan 0.0010 -0.0000
## 840 0.9090 nan 0.0010 -0.0000
## 860 0.9013 nan 0.0010 0.0001
## 880 0.8942 nan 0.0010 0.0001
## 900 0.8860 nan 0.0010 0.0000
## 920 0.8782 nan 0.0010 0.0001
## 940 0.8705 nan 0.0010 0.0000
## 960 0.8626 nan 0.0010 -0.0000
## 980 0.8554 nan 0.0010 -0.0000
## 1000 0.8482 nan 0.0010 -0.0001
## 1020 0.8409 nan 0.0010 -0.0002
## 1040 0.8340 nan 0.0010 -0.0000
## 1060 0.8275 nan 0.0010 -0.0000
## 1080 0.8208 nan 0.0010 0.0000
## 1100 0.8137 nan 0.0010 0.0001
## 1120 0.8071 nan 0.0010 0.0000
## 1140 0.8006 nan 0.0010 0.0000
## 1160 0.7941 nan 0.0010 -0.0000
## 1180 0.7874 nan 0.0010 -0.0001
## 1200 0.7811 nan 0.0010 0.0000
## 1220 0.7753 nan 0.0010 -0.0000
## 1240 0.7690 nan 0.0010 0.0000
## 1260 0.7627 nan 0.0010 0.0001
## 1280 0.7568 nan 0.0010 0.0000
## 1300 0.7504 nan 0.0010 -0.0000
## 1320 0.7446 nan 0.0010 -0.0001
## 1340 0.7392 nan 0.0010 0.0000
## 1360 0.7332 nan 0.0010 -0.0001
## 1380 0.7276 nan 0.0010 -0.0000
## 1400 0.7215 nan 0.0010 -0.0002
## 1420 0.7159 nan 0.0010 0.0000
## 1440 0.7105 nan 0.0010 -0.0001
## 1460 0.7050 nan 0.0010 0.0001
## 1480 0.6996 nan 0.0010 -0.0000
## 1500 0.6951 nan 0.0010 -0.0001
## 1520 0.6896 nan 0.0010 0.0000
## 1540 0.6840 nan 0.0010 0.0000
## 1560 0.6787 nan 0.0010 -0.0000
## 1580 0.6736 nan 0.0010 -0.0001
## 1600 0.6687 nan 0.0010 0.0000
## 1620 0.6635 nan 0.0010 -0.0000
## 1640 0.6584 nan 0.0010 -0.0000
## 1660 0.6530 nan 0.0010 -0.0000
## 1680 0.6481 nan 0.0010 -0.0001
## 1700 0.6435 nan 0.0010 0.0000
## 1720 0.6388 nan 0.0010 -0.0001
## 1740 0.6339 nan 0.0010 -0.0001
## 1760 0.6287 nan 0.0010 -0.0001
## 1780 0.6241 nan 0.0010 0.0000
## 1800 0.6194 nan 0.0010 0.0001
## 1820 0.6150 nan 0.0010 0.0000
## 1840 0.6104 nan 0.0010 0.0000
## 1860 0.6059 nan 0.0010 -0.0000
## 1880 0.6012 nan 0.0010 -0.0000
## 1900 0.5966 nan 0.0010 -0.0001
## 1920 0.5919 nan 0.0010 0.0001
## 1940 0.5879 nan 0.0010 -0.0000
## 1960 0.5838 nan 0.0010 -0.0000
## 1980 0.5795 nan 0.0010 -0.0000
## 2000 0.5757 nan 0.0010 0.0001
## 2020 0.5716 nan 0.0010 0.0000
## 2040 0.5677 nan 0.0010 -0.0001
## 2060 0.5636 nan 0.0010 -0.0001
## 2080 0.5596 nan 0.0010 0.0000
## 2100 0.5553 nan 0.0010 0.0000
## 2120 0.5511 nan 0.0010 -0.0000
## 2140 0.5475 nan 0.0010 -0.0000
## 2160 0.5434 nan 0.0010 -0.0000
## 2180 0.5394 nan 0.0010 -0.0000
## 2200 0.5355 nan 0.0010 -0.0001
## 2220 0.5318 nan 0.0010 0.0000
## 2240 0.5283 nan 0.0010 -0.0001
## 2260 0.5246 nan 0.0010 -0.0000
## 2280 0.5207 nan 0.0010 -0.0000
## 2300 0.5170 nan 0.0010 -0.0001
## 2320 0.5136 nan 0.0010 -0.0000
## 2340 0.5096 nan 0.0010 -0.0000
## 2360 0.5062 nan 0.0010 -0.0000
## 2380 0.5027 nan 0.0010 0.0000
## 2400 0.4995 nan 0.0010 -0.0000
## 2420 0.4960 nan 0.0010 -0.0001
## 2440 0.4925 nan 0.0010 -0.0001
## 2460 0.4890 nan 0.0010 -0.0001
## 2480 0.4856 nan 0.0010 0.0000
## 2500 0.4820 nan 0.0010 -0.0000
## 2520 0.4788 nan 0.0010 -0.0001
## 2540 0.4759 nan 0.0010 -0.0000
## 2560 0.4724 nan 0.0010 -0.0000
## 2580 0.4695 nan 0.0010 -0.0000
## 2600 0.4661 nan 0.0010 -0.0001
## 2620 0.4628 nan 0.0010 -0.0000
## 2640 0.4596 nan 0.0010 -0.0000
## 2660 0.4565 nan 0.0010 0.0000
## 2680 0.4532 nan 0.0010 -0.0000
## 2700 0.4497 nan 0.0010 -0.0000
## 2720 0.4468 nan 0.0010 -0.0000
## 2740 0.4439 nan 0.0010 -0.0000
## 2760 0.4408 nan 0.0010 0.0000
## 2780 0.4379 nan 0.0010 -0.0000
## 2800 0.4350 nan 0.0010 -0.0001
## 2820 0.4318 nan 0.0010 0.0000
## 2840 0.4289 nan 0.0010 -0.0001
## 2860 0.4262 nan 0.0010 -0.0000
## 2880 0.4236 nan 0.0010 -0.0001
## 2900 0.4212 nan 0.0010 0.0000
## 2920 0.4184 nan 0.0010 -0.0000
## 2940 0.4158 nan 0.0010 -0.0000
## 2960 0.4130 nan 0.0010 -0.0000
## 2980 0.4106 nan 0.0010 -0.0001
## 3000 0.4081 nan 0.0010 -0.0001
## 3020 0.4055 nan 0.0010 -0.0000
## 3040 0.4029 nan 0.0010 -0.0000
## 3060 0.4002 nan 0.0010 -0.0001
## 3080 0.3974 nan 0.0010 0.0000
## 3100 0.3951 nan 0.0010 -0.0000
## 3120 0.3926 nan 0.0010 -0.0001
## 3140 0.3902 nan 0.0010 -0.0000
## 3160 0.3875 nan 0.0010 -0.0000
## 3180 0.3849 nan 0.0010 -0.0001
## 3200 0.3823 nan 0.0010 -0.0000
## 3220 0.3799 nan 0.0010 -0.0000
## 3240 0.3773 nan 0.0010 -0.0000
## 3260 0.3749 nan 0.0010 0.0000
## 3280 0.3723 nan 0.0010 -0.0000
## 3300 0.3698 nan 0.0010 -0.0000
## 3320 0.3674 nan 0.0010 -0.0000
## 3340 0.3649 nan 0.0010 -0.0000
## 3360 0.3625 nan 0.0010 -0.0000
## 3380 0.3601 nan 0.0010 -0.0000
## 3400 0.3581 nan 0.0010 -0.0001
## 3420 0.3561 nan 0.0010 -0.0001
## 3440 0.3538 nan 0.0010 -0.0000
## 3460 0.3518 nan 0.0010 -0.0001
## 3480 0.3493 nan 0.0010 0.0000
## 3500 0.3470 nan 0.0010 0.0000
## 3520 0.3448 nan 0.0010 -0.0001
## 3540 0.3426 nan 0.0010 -0.0000
## 3560 0.3406 nan 0.0010 0.0000
## 3580 0.3383 nan 0.0010 -0.0000
## 3600 0.3362 nan 0.0010 0.0000
## 3620 0.3342 nan 0.0010 -0.0000
## 3640 0.3323 nan 0.0010 -0.0000
## 3660 0.3302 nan 0.0010 -0.0000
## 3680 0.3283 nan 0.0010 -0.0000
## 3700 0.3261 nan 0.0010 -0.0000
## 3720 0.3239 nan 0.0010 -0.0001
## 3740 0.3218 nan 0.0010 -0.0000
## 3760 0.3198 nan 0.0010 -0.0000
## 3780 0.3176 nan 0.0010 0.0000
## 3800 0.3155 nan 0.0010 -0.0000
## 3820 0.3136 nan 0.0010 -0.0000
## 3840 0.3117 nan 0.0010 -0.0000
## 3860 0.3095 nan 0.0010 -0.0000
## 3880 0.3077 nan 0.0010 -0.0000
## 3900 0.3055 nan 0.0010 -0.0000
## 3920 0.3034 nan 0.0010 0.0000
## 3940 0.3014 nan 0.0010 -0.0000
## 3960 0.2995 nan 0.0010 -0.0000
## 3980 0.2976 nan 0.0010 -0.0001
## 4000 0.2959 nan 0.0010 -0.0000
## 4020 0.2940 nan 0.0010 -0.0001
## 4040 0.2920 nan 0.0010 0.0000
## 4060 0.2901 nan 0.0010 -0.0000
## 4080 0.2884 nan 0.0010 -0.0000
## 4100 0.2870 nan 0.0010 -0.0000
## 4120 0.2852 nan 0.0010 0.0000
## 4140 0.2834 nan 0.0010 -0.0000
## 4160 0.2816 nan 0.0010 -0.0000
## 4180 0.2799 nan 0.0010 -0.0000
## 4200 0.2778 nan 0.0010 -0.0000
## 4220 0.2762 nan 0.0010 -0.0000
## 4240 0.2744 nan 0.0010 -0.0000
## 4260 0.2728 nan 0.0010 -0.0000
## 4280 0.2713 nan 0.0010 -0.0000
## 4300 0.2697 nan 0.0010 -0.0000
## 4320 0.2683 nan 0.0010 -0.0000
## 4340 0.2667 nan 0.0010 -0.0000
## 4360 0.2652 nan 0.0010 -0.0000
## 4380 0.2635 nan 0.0010 -0.0000
## 4400 0.2619 nan 0.0010 -0.0000
## 4420 0.2603 nan 0.0010 -0.0000
## 4440 0.2587 nan 0.0010 -0.0000
## 4460 0.2571 nan 0.0010 -0.0000
## 4480 0.2556 nan 0.0010 -0.0000
## 4500 0.2540 nan 0.0010 -0.0000
## 4520 0.2524 nan 0.0010 -0.0000
## 4540 0.2509 nan 0.0010 0.0000
## 4560 0.2492 nan 0.0010 -0.0000
## 4580 0.2480 nan 0.0010 -0.0000
## 4600 0.2464 nan 0.0010 -0.0000
## 4620 0.2449 nan 0.0010 0.0000
## 4640 0.2435 nan 0.0010 -0.0000
## 4660 0.2420 nan 0.0010 -0.0000
## 4680 0.2405 nan 0.0010 -0.0000
## 4700 0.2391 nan 0.0010 -0.0000
## 4720 0.2375 nan 0.0010 0.0000
## 4740 0.2363 nan 0.0010 -0.0000
## 4760 0.2351 nan 0.0010 -0.0000
## 4780 0.2338 nan 0.0010 -0.0000
## 4800 0.2324 nan 0.0010 -0.0000
## 4820 0.2309 nan 0.0010 -0.0000
## 4840 0.2295 nan 0.0010 0.0000
## 4860 0.2281 nan 0.0010 -0.0000
## 4880 0.2267 nan 0.0010 -0.0000
## 4900 0.2251 nan 0.0010 -0.0000
## 4920 0.2238 nan 0.0010 -0.0000
## 4940 0.2226 nan 0.0010 -0.0001
## 4960 0.2213 nan 0.0010 -0.0000
## 4980 0.2200 nan 0.0010 -0.0000
## 5000 0.2186 nan 0.0010 -0.0000
## 5020 0.2173 nan 0.0010 -0.0000
## 5040 0.2161 nan 0.0010 -0.0000
## 5060 0.2146 nan 0.0010 -0.0000
## 5080 0.2133 nan 0.0010 -0.0000
## 5100 0.2120 nan 0.0010 -0.0000
## 5120 0.2105 nan 0.0010 -0.0000
## 5140 0.2093 nan 0.0010 -0.0000
## 5160 0.2081 nan 0.0010 -0.0000
## 5180 0.2070 nan 0.0010 -0.0000
## 5200 0.2057 nan 0.0010 -0.0000
## 5220 0.2046 nan 0.0010 -0.0000
## 5240 0.2035 nan 0.0010 -0.0000
## 5260 0.2023 nan 0.0010 -0.0000
## 5280 0.2011 nan 0.0010 -0.0000
## 5300 0.2002 nan 0.0010 -0.0000
## 5320 0.1991 nan 0.0010 -0.0000
## 5340 0.1980 nan 0.0010 -0.0000
## 5360 0.1966 nan 0.0010 -0.0000
## 5380 0.1954 nan 0.0010 -0.0000
## 5400 0.1942 nan 0.0010 -0.0000
## 5420 0.1930 nan 0.0010 -0.0000
## 5440 0.1919 nan 0.0010 -0.0000
## 5460 0.1909 nan 0.0010 -0.0000
## 5480 0.1898 nan 0.0010 -0.0000
## 5500 0.1887 nan 0.0010 -0.0000
## 5520 0.1876 nan 0.0010 -0.0000
## 5540 0.1867 nan 0.0010 -0.0000
## 5560 0.1857 nan 0.0010 -0.0000
## 5580 0.1846 nan 0.0010 -0.0000
## 5600 0.1835 nan 0.0010 -0.0000
## 5620 0.1825 nan 0.0010 -0.0000
## 5640 0.1814 nan 0.0010 -0.0000
## 5660 0.1803 nan 0.0010 0.0000
## 5680 0.1793 nan 0.0010 -0.0000
## 5700 0.1781 nan 0.0010 -0.0000
## 5720 0.1772 nan 0.0010 -0.0000
## 5740 0.1762 nan 0.0010 -0.0000
## 5760 0.1751 nan 0.0010 -0.0000
## 5780 0.1741 nan 0.0010 -0.0000
## 5800 0.1730 nan 0.0010 -0.0000
## 5820 0.1719 nan 0.0010 -0.0000
## 5840 0.1710 nan 0.0010 -0.0000
## 5860 0.1701 nan 0.0010 -0.0000
## 5880 0.1693 nan 0.0010 -0.0000
## 5900 0.1682 nan 0.0010 -0.0000
## 5920 0.1672 nan 0.0010 -0.0000
## 5940 0.1663 nan 0.0010 -0.0000
## 5960 0.1654 nan 0.0010 -0.0000
## 5980 0.1645 nan 0.0010 -0.0000
## 6000 0.1636 nan 0.0010 -0.0000
## 6020 0.1627 nan 0.0010 -0.0000
## 6040 0.1618 nan 0.0010 -0.0000
## 6060 0.1608 nan 0.0010 -0.0000
## 6080 0.1601 nan 0.0010 -0.0000
## 6100 0.1592 nan 0.0010 -0.0000
## 6120 0.1583 nan 0.0010 -0.0000
## 6140 0.1574 nan 0.0010 -0.0000
## 6160 0.1564 nan 0.0010 -0.0000
## 6180 0.1556 nan 0.0010 -0.0000
## 6200 0.1546 nan 0.0010 -0.0000
## 6220 0.1537 nan 0.0010 -0.0000
## 6240 0.1529 nan 0.0010 -0.0000
## 6260 0.1520 nan 0.0010 -0.0000
## 6280 0.1511 nan 0.0010 -0.0000
## 6300 0.1503 nan 0.0010 0.0000
## 6320 0.1495 nan 0.0010 0.0000
## 6340 0.1485 nan 0.0010 -0.0000
## 6360 0.1477 nan 0.0010 -0.0000
## 6380 0.1468 nan 0.0010 0.0000
## 6400 0.1460 nan 0.0010 -0.0000
## 6420 0.1452 nan 0.0010 -0.0000
## 6440 0.1444 nan 0.0010 0.0000
## 6460 0.1435 nan 0.0010 -0.0000
## 6480 0.1426 nan 0.0010 -0.0000
## 6500 0.1417 nan 0.0010 0.0000
## 6520 0.1410 nan 0.0010 -0.0000
## 6540 0.1402 nan 0.0010 -0.0000
## 6560 0.1395 nan 0.0010 -0.0000
## 6580 0.1387 nan 0.0010 -0.0000
## 6600 0.1380 nan 0.0010 -0.0000
## 6620 0.1371 nan 0.0010 -0.0000
## 6640 0.1364 nan 0.0010 -0.0000
## 6660 0.1356 nan 0.0010 -0.0000
## 6680 0.1349 nan 0.0010 -0.0000
## 6700 0.1342 nan 0.0010 -0.0000
## 6720 0.1335 nan 0.0010 -0.0000
## 6740 0.1327 nan 0.0010 -0.0000
## 6760 0.1319 nan 0.0010 -0.0000
## 6780 0.1311 nan 0.0010 -0.0000
## 6800 0.1304 nan 0.0010 -0.0000
## 6820 0.1297 nan 0.0010 -0.0000
## 6840 0.1290 nan 0.0010 -0.0000
## 6860 0.1283 nan 0.0010 -0.0000
## 6880 0.1277 nan 0.0010 -0.0000
## 6900 0.1269 nan 0.0010 0.0000
## 6920 0.1262 nan 0.0010 -0.0000
## 6940 0.1255 nan 0.0010 -0.0000
## 6960 0.1247 nan 0.0010 -0.0000
## 6980 0.1239 nan 0.0010 0.0000
## 7000 0.1231 nan 0.0010 -0.0000
## 7020 0.1225 nan 0.0010 -0.0000
## 7040 0.1219 nan 0.0010 -0.0000
## 7060 0.1212 nan 0.0010 -0.0000
## 7080 0.1206 nan 0.0010 -0.0000
## 7100 0.1199 nan 0.0010 -0.0000
## 7120 0.1193 nan 0.0010 0.0000
## 7140 0.1186 nan 0.0010 -0.0000
## 7160 0.1180 nan 0.0010 -0.0000
## 7180 0.1173 nan 0.0010 -0.0000
## 7200 0.1167 nan 0.0010 -0.0000
## 7220 0.1161 nan 0.0010 -0.0000
## 7240 0.1155 nan 0.0010 -0.0000
## 7260 0.1148 nan 0.0010 -0.0000
## 7280 0.1142 nan 0.0010 -0.0000
## 7300 0.1137 nan 0.0010 -0.0000
## 7320 0.1130 nan 0.0010 -0.0000
## 7340 0.1123 nan 0.0010 0.0000
## 7360 0.1117 nan 0.0010 -0.0000
## 7380 0.1110 nan 0.0010 -0.0000
## 7400 0.1104 nan 0.0010 -0.0000
## 7420 0.1097 nan 0.0010 0.0000
## 7440 0.1091 nan 0.0010 -0.0000
## 7460 0.1085 nan 0.0010 -0.0000
## 7480 0.1079 nan 0.0010 0.0000
## 7500 0.1073 nan 0.0010 -0.0000
## 7520 0.1067 nan 0.0010 -0.0000
## 7540 0.1062 nan 0.0010 -0.0000
## 7560 0.1056 nan 0.0010 -0.0000
## 7580 0.1050 nan 0.0010 -0.0000
## 7600 0.1045 nan 0.0010 -0.0000
## 7620 0.1038 nan 0.0010 -0.0000
## 7640 0.1032 nan 0.0010 -0.0000
## 7660 0.1027 nan 0.0010 -0.0000
## 7680 0.1021 nan 0.0010 0.0000
## 7700 0.1015 nan 0.0010 -0.0000
## 7720 0.1010 nan 0.0010 -0.0000
## 7740 0.1004 nan 0.0010 0.0000
## 7760 0.0999 nan 0.0010 -0.0000
## 7780 0.0994 nan 0.0010 -0.0000
## 7800 0.0989 nan 0.0010 -0.0000
## 7820 0.0984 nan 0.0010 -0.0000
## 7840 0.0979 nan 0.0010 -0.0000
## 7860 0.0973 nan 0.0010 -0.0000
## 7880 0.0969 nan 0.0010 -0.0000
## 7900 0.0964 nan 0.0010 -0.0000
## 7920 0.0958 nan 0.0010 -0.0000
## 7940 0.0954 nan 0.0010 -0.0000
## 7960 0.0948 nan 0.0010 0.0000
## 7980 0.0943 nan 0.0010 -0.0000
## 8000 0.0938 nan 0.0010 -0.0000
## 8020 0.0932 nan 0.0010 0.0000
## 8040 0.0927 nan 0.0010 -0.0000
## 8060 0.0922 nan 0.0010 0.0000
## 8080 0.0917 nan 0.0010 0.0000
## 8100 0.0911 nan 0.0010 -0.0000
## 8120 0.0907 nan 0.0010 -0.0000
## 8140 0.0902 nan 0.0010 -0.0000
## 8160 0.0897 nan 0.0010 -0.0000
## 8180 0.0893 nan 0.0010 -0.0000
## 8200 0.0887 nan 0.0010 0.0000
## 8220 0.0883 nan 0.0010 -0.0000
## 8240 0.0878 nan 0.0010 -0.0000
## 8260 0.0873 nan 0.0010 -0.0000
## 8280 0.0869 nan 0.0010 -0.0000
## 8300 0.0864 nan 0.0010 -0.0000
## 8320 0.0860 nan 0.0010 -0.0000
## 8340 0.0857 nan 0.0010 -0.0000
## 8360 0.0852 nan 0.0010 -0.0000
## 8380 0.0848 nan 0.0010 -0.0000
## 8400 0.0843 nan 0.0010 -0.0000
## 8420 0.0839 nan 0.0010 -0.0000
## 8440 0.0835 nan 0.0010 -0.0000
## 8460 0.0831 nan 0.0010 -0.0000
## 8480 0.0826 nan 0.0010 -0.0000
## 8500 0.0822 nan 0.0010 -0.0000
## 8520 0.0818 nan 0.0010 -0.0000
## 8540 0.0814 nan 0.0010 -0.0000
## 8560 0.0810 nan 0.0010 -0.0000
## 8580 0.0806 nan 0.0010 -0.0000
## 8600 0.0801 nan 0.0010 -0.0000
## 8620 0.0796 nan 0.0010 -0.0000
## 8640 0.0792 nan 0.0010 -0.0000
## 8660 0.0788 nan 0.0010 -0.0000
## 8680 0.0784 nan 0.0010 -0.0000
## 8700 0.0780 nan 0.0010 -0.0000
## 8720 0.0775 nan 0.0010 -0.0000
## 8740 0.0771 nan 0.0010 0.0000
## 8760 0.0767 nan 0.0010 -0.0000
## 8780 0.0763 nan 0.0010 -0.0000
## 8800 0.0759 nan 0.0010 -0.0000
## 8820 0.0755 nan 0.0010 0.0000
## 8840 0.0750 nan 0.0010 -0.0000
## 8860 0.0747 nan 0.0010 -0.0000
## 8880 0.0743 nan 0.0010 -0.0000
## 8900 0.0739 nan 0.0010 -0.0000
## 8920 0.0735 nan 0.0010 -0.0000
## 8940 0.0732 nan 0.0010 -0.0000
## 8960 0.0728 nan 0.0010 -0.0000
## 8980 0.0724 nan 0.0010 -0.0000
## 9000 0.0720 nan 0.0010 -0.0000
## 9020 0.0717 nan 0.0010 -0.0000
## 9040 0.0713 nan 0.0010 -0.0000
## 9060 0.0710 nan 0.0010 -0.0000
## 9080 0.0706 nan 0.0010 -0.0000
## 9100 0.0702 nan 0.0010 -0.0000
## 9120 0.0698 nan 0.0010 -0.0000
## 9140 0.0695 nan 0.0010 -0.0000
## 9160 0.0692 nan 0.0010 -0.0000
## 9180 0.0688 nan 0.0010 -0.0000
## 9200 0.0684 nan 0.0010 -0.0000
## 9220 0.0680 nan 0.0010 -0.0000
## 9240 0.0676 nan 0.0010 0.0000
## 9260 0.0673 nan 0.0010 -0.0000
## 9280 0.0669 nan 0.0010 -0.0000
## 9300 0.0666 nan 0.0010 -0.0000
## 9320 0.0662 nan 0.0010 -0.0000
## 9340 0.0659 nan 0.0010 -0.0000
## 9360 0.0655 nan 0.0010 -0.0000
## 9380 0.0652 nan 0.0010 -0.0000
## 9400 0.0649 nan 0.0010 -0.0000
## 9420 0.0646 nan 0.0010 -0.0000
## 9440 0.0643 nan 0.0010 -0.0000
## 9460 0.0640 nan 0.0010 -0.0000
## 9480 0.0636 nan 0.0010 -0.0000
## 9500 0.0633 nan 0.0010 -0.0000
## 9520 0.0630 nan 0.0010 -0.0000
## 9540 0.0626 nan 0.0010 -0.0000
## 9560 0.0623 nan 0.0010 -0.0000
## 9580 0.0620 nan 0.0010 -0.0000
## 9600 0.0617 nan 0.0010 -0.0000
## 9620 0.0614 nan 0.0010 -0.0000
## 9640 0.0612 nan 0.0010 -0.0000
## 9660 0.0609 nan 0.0010 -0.0000
## 9680 0.0605 nan 0.0010 -0.0000
## 9700 0.0602 nan 0.0010 -0.0000
## 9720 0.0599 nan 0.0010 -0.0000
## 9740 0.0596 nan 0.0010 -0.0000
## 9760 0.0592 nan 0.0010 -0.0000
## 9780 0.0589 nan 0.0010 -0.0000
## 9800 0.0586 nan 0.0010 -0.0000
## 9820 0.0582 nan 0.0010 -0.0000
## 9840 0.0579 nan 0.0010 -0.0000
## 9860 0.0576 nan 0.0010 -0.0000
## 9880 0.0573 nan 0.0010 -0.0000
## 9900 0.0570 nan 0.0010 -0.0000
## 9920 0.0568 nan 0.0010 -0.0000
## 9940 0.0565 nan 0.0010 -0.0000
## 9960 0.0562 nan 0.0010 -0.0000
## 9980 0.0558 nan 0.0010 0.0000
## 10000 0.0556 nan 0.0010 -0.0000
## 10020 0.0552 nan 0.0010 0.0000
## 10040 0.0549 nan 0.0010 0.0000
## 10060 0.0547 nan 0.0010 -0.0000
## 10080 0.0544 nan 0.0010 -0.0000
## 10100 0.0541 nan 0.0010 -0.0000
## 10120 0.0538 nan 0.0010 -0.0000
## 10140 0.0535 nan 0.0010 -0.0000
## 10160 0.0532 nan 0.0010 -0.0000
## 10180 0.0529 nan 0.0010 -0.0000
## 10200 0.0526 nan 0.0010 -0.0000
## 10220 0.0524 nan 0.0010 -0.0000
## 10240 0.0521 nan 0.0010 -0.0000
## 10260 0.0518 nan 0.0010 -0.0000
## 10280 0.0515 nan 0.0010 -0.0000
## 10300 0.0513 nan 0.0010 -0.0000
## 10320 0.0510 nan 0.0010 -0.0000
## 10340 0.0508 nan 0.0010 -0.0000
## 10360 0.0505 nan 0.0010 -0.0000
## 10380 0.0503 nan 0.0010 -0.0000
## 10400 0.0500 nan 0.0010 -0.0000
## 10420 0.0498 nan 0.0010 -0.0000
## 10440 0.0496 nan 0.0010 -0.0000
## 10460 0.0493 nan 0.0010 -0.0000
## 10480 0.0490 nan 0.0010 -0.0000
## 10500 0.0488 nan 0.0010 -0.0000
## 10520 0.0486 nan 0.0010 -0.0000
## 10540 0.0483 nan 0.0010 -0.0000
## 10560 0.0480 nan 0.0010 0.0000
## 10580 0.0478 nan 0.0010 -0.0000
## 10600 0.0475 nan 0.0010 -0.0000
## 10620 0.0473 nan 0.0010 -0.0000
## 10640 0.0471 nan 0.0010 -0.0000
## 10660 0.0469 nan 0.0010 -0.0000
## 10680 0.0466 nan 0.0010 -0.0000
## 10700 0.0464 nan 0.0010 -0.0000
## 10720 0.0462 nan 0.0010 -0.0000
## 10740 0.0459 nan 0.0010 -0.0000
## 10760 0.0457 nan 0.0010 -0.0000
## 10780 0.0455 nan 0.0010 -0.0000
## 10800 0.0452 nan 0.0010 -0.0000
## 10820 0.0450 nan 0.0010 -0.0000
## 10840 0.0448 nan 0.0010 -0.0000
## 10860 0.0445 nan 0.0010 -0.0000
## 10880 0.0442 nan 0.0010 -0.0000
## 10900 0.0440 nan 0.0010 -0.0000
## 10920 0.0438 nan 0.0010 -0.0000
## 10940 0.0436 nan 0.0010 -0.0000
## 10960 0.0434 nan 0.0010 -0.0000
## 10980 0.0432 nan 0.0010 -0.0000
## 11000 0.0430 nan 0.0010 0.0000
## 11020 0.0427 nan 0.0010 -0.0000
## 11040 0.0425 nan 0.0010 -0.0000
## 11060 0.0423 nan 0.0010 -0.0000
## 11080 0.0421 nan 0.0010 -0.0000
## 11100 0.0419 nan 0.0010 -0.0000
## 11120 0.0416 nan 0.0010 -0.0000
## 11140 0.0414 nan 0.0010 -0.0000
## 11160 0.0412 nan 0.0010 -0.0000
## 11180 0.0410 nan 0.0010 -0.0000
## 11200 0.0408 nan 0.0010 -0.0000
## 11220 0.0406 nan 0.0010 -0.0000
## 11240 0.0404 nan 0.0010 -0.0000
## 11260 0.0402 nan 0.0010 -0.0000
## 11280 0.0401 nan 0.0010 -0.0000
## 11300 0.0399 nan 0.0010 -0.0000
## 11320 0.0397 nan 0.0010 -0.0000
## 11340 0.0395 nan 0.0010 -0.0000
## 11360 0.0393 nan 0.0010 -0.0000
## 11380 0.0391 nan 0.0010 -0.0000
## 11400 0.0389 nan 0.0010 -0.0000
## 11420 0.0387 nan 0.0010 -0.0000
## 11440 0.0385 nan 0.0010 -0.0000
## 11460 0.0383 nan 0.0010 -0.0000
## 11480 0.0381 nan 0.0010 -0.0000
## 11500 0.0379 nan 0.0010 0.0000
## 11520 0.0378 nan 0.0010 -0.0000
## 11540 0.0376 nan 0.0010 -0.0000
## 11560 0.0374 nan 0.0010 -0.0000
## 11580 0.0372 nan 0.0010 -0.0000
## 11600 0.0370 nan 0.0010 -0.0000
## 11620 0.0368 nan 0.0010 -0.0000
## 11640 0.0366 nan 0.0010 -0.0000
## 11660 0.0364 nan 0.0010 -0.0000
## 11680 0.0362 nan 0.0010 -0.0000
## 11700 0.0361 nan 0.0010 -0.0000
## 11720 0.0359 nan 0.0010 -0.0000
## 11740 0.0357 nan 0.0010 -0.0000
## 11760 0.0355 nan 0.0010 0.0000
## 11780 0.0353 nan 0.0010 -0.0000
## 11800 0.0351 nan 0.0010 -0.0000
## 11820 0.0349 nan 0.0010 -0.0000
## 11840 0.0348 nan 0.0010 -0.0000
## 11860 0.0346 nan 0.0010 -0.0000
## 11880 0.0344 nan 0.0010 -0.0000
## 11900 0.0342 nan 0.0010 -0.0000
## 11920 0.0341 nan 0.0010 -0.0000
## 11940 0.0339 nan 0.0010 -0.0000
## 11960 0.0337 nan 0.0010 -0.0000
## 11980 0.0335 nan 0.0010 -0.0000
## 12000 0.0334 nan 0.0010 -0.0000
## 12020 0.0332 nan 0.0010 -0.0000
## 12040 0.0330 nan 0.0010 -0.0000
## 12060 0.0329 nan 0.0010 -0.0000
## 12080 0.0327 nan 0.0010 -0.0000
## 12100 0.0325 nan 0.0010 -0.0000
## 12120 0.0324 nan 0.0010 -0.0000
## 12140 0.0322 nan 0.0010 -0.0000
## 12160 0.0321 nan 0.0010 -0.0000
## 12180 0.0319 nan 0.0010 -0.0000
## 12200 0.0317 nan 0.0010 0.0000
## 12220 0.0316 nan 0.0010 -0.0000
## 12240 0.0315 nan 0.0010 -0.0000
## 12260 0.0313 nan 0.0010 -0.0000
## 12280 0.0312 nan 0.0010 -0.0000
## 12300 0.0310 nan 0.0010 -0.0000
## 12320 0.0309 nan 0.0010 -0.0000
## 12340 0.0308 nan 0.0010 -0.0000
## 12360 0.0306 nan 0.0010 -0.0000
## 12380 0.0305 nan 0.0010 -0.0000
## 12400 0.0303 nan 0.0010 -0.0000
## 12420 0.0302 nan 0.0010 -0.0000
## 12440 0.0300 nan 0.0010 -0.0000
## 12460 0.0299 nan 0.0010 -0.0000
## 12480 0.0297 nan 0.0010 -0.0000
## 12500 0.0295 nan 0.0010 -0.0000
## 12520 0.0294 nan 0.0010 0.0000
## 12540 0.0293 nan 0.0010 -0.0000
## 12560 0.0291 nan 0.0010 -0.0000
## 12580 0.0290 nan 0.0010 -0.0000
## 12600 0.0288 nan 0.0010 -0.0000
## 12620 0.0287 nan 0.0010 -0.0000
## 12640 0.0286 nan 0.0010 -0.0000
## 12660 0.0284 nan 0.0010 -0.0000
## 12680 0.0283 nan 0.0010 0.0000
## 12700 0.0281 nan 0.0010 -0.0000
## 12720 0.0280 nan 0.0010 -0.0000
## 12740 0.0278 nan 0.0010 -0.0000
## 12760 0.0277 nan 0.0010 -0.0000
## 12780 0.0276 nan 0.0010 -0.0000
## 12800 0.0275 nan 0.0010 -0.0000
## 12820 0.0274 nan 0.0010 -0.0000
## 12840 0.0272 nan 0.0010 -0.0000
## 12860 0.0271 nan 0.0010 -0.0000
## 12880 0.0270 nan 0.0010 0.0000
## 12900 0.0269 nan 0.0010 -0.0000
## 12920 0.0267 nan 0.0010 -0.0000
## 12940 0.0266 nan 0.0010 -0.0000
## 12960 0.0265 nan 0.0010 -0.0000
## 12980 0.0264 nan 0.0010 -0.0000
## 13000 0.0262 nan 0.0010 -0.0000
## 13020 0.0261 nan 0.0010 -0.0000
## 13040 0.0260 nan 0.0010 -0.0000
## 13060 0.0259 nan 0.0010 -0.0000
## 13080 0.0258 nan 0.0010 -0.0000
## 13100 0.0256 nan 0.0010 -0.0000
## 13120 0.0255 nan 0.0010 -0.0000
## 13140 0.0254 nan 0.0010 -0.0000
## 13160 0.0253 nan 0.0010 -0.0000
## 13180 0.0251 nan 0.0010 -0.0000
## 13200 0.0250 nan 0.0010 -0.0000
## 13220 0.0249 nan 0.0010 -0.0000
## 13240 0.0247 nan 0.0010 -0.0000
## 13260 0.0246 nan 0.0010 -0.0000
## 13280 0.0245 nan 0.0010 -0.0000
## 13300 0.0244 nan 0.0010 -0.0000
## 13320 0.0243 nan 0.0010 -0.0000
## 13340 0.0242 nan 0.0010 -0.0000
## 13360 0.0240 nan 0.0010 -0.0000
## 13380 0.0239 nan 0.0010 -0.0000
## 13400 0.0238 nan 0.0010 -0.0000
## 13420 0.0237 nan 0.0010 -0.0000
## 13440 0.0235 nan 0.0010 -0.0000
## 13460 0.0234 nan 0.0010 -0.0000
## 13480 0.0233 nan 0.0010 -0.0000
## 13500 0.0232 nan 0.0010 -0.0000
## 13520 0.0231 nan 0.0010 -0.0000
## 13540 0.0230 nan 0.0010 -0.0000
## 13560 0.0228 nan 0.0010 -0.0000
## 13580 0.0227 nan 0.0010 -0.0000
## 13600 0.0226 nan 0.0010 -0.0000
## 13620 0.0225 nan 0.0010 -0.0000
## 13640 0.0224 nan 0.0010 -0.0000
## 13660 0.0223 nan 0.0010 -0.0000
## 13680 0.0222 nan 0.0010 -0.0000
## 13700 0.0221 nan 0.0010 -0.0000
## 13720 0.0220 nan 0.0010 0.0000
## 13740 0.0219 nan 0.0010 -0.0000
## 13760 0.0218 nan 0.0010 -0.0000
## 13780 0.0217 nan 0.0010 -0.0000
## 13800 0.0215 nan 0.0010 -0.0000
## 13820 0.0214 nan 0.0010 -0.0000
## 13840 0.0213 nan 0.0010 -0.0000
## 13860 0.0212 nan 0.0010 -0.0000
## 13880 0.0211 nan 0.0010 0.0000
## 13900 0.0210 nan 0.0010 -0.0000
## 13920 0.0209 nan 0.0010 -0.0000
## 13940 0.0208 nan 0.0010 -0.0000
## 13960 0.0207 nan 0.0010 -0.0000
## 13980 0.0206 nan 0.0010 -0.0000
## 14000 0.0205 nan 0.0010 -0.0000
## 14020 0.0204 nan 0.0010 -0.0000
## 14040 0.0203 nan 0.0010 -0.0000
## 14060 0.0202 nan 0.0010 -0.0000
## 14080 0.0201 nan 0.0010 -0.0000
## 14100 0.0200 nan 0.0010 0.0000
## 14120 0.0199 nan 0.0010 -0.0000
## 14140 0.0198 nan 0.0010 -0.0000
## 14160 0.0197 nan 0.0010 -0.0000
## 14180 0.0196 nan 0.0010 -0.0000
## 14200 0.0195 nan 0.0010 -0.0000
## 14220 0.0194 nan 0.0010 -0.0000
## 14240 0.0193 nan 0.0010 -0.0000
## 14260 0.0192 nan 0.0010 -0.0000
## 14280 0.0191 nan 0.0010 -0.0000
## 14300 0.0191 nan 0.0010 -0.0000
## 14320 0.0190 nan 0.0010 -0.0000
## 14340 0.0189 nan 0.0010 -0.0000
## 14360 0.0188 nan 0.0010 -0.0000
## 14380 0.0187 nan 0.0010 -0.0000
## 14400 0.0186 nan 0.0010 -0.0000
## 14420 0.0185 nan 0.0010 -0.0000
## 14440 0.0184 nan 0.0010 -0.0000
## 14460 0.0183 nan 0.0010 -0.0000
## 14480 0.0183 nan 0.0010 -0.0000
## 14500 0.0182 nan 0.0010 -0.0000
## 14520 0.0181 nan 0.0010 -0.0000
## 14540 0.0180 nan 0.0010 -0.0000
## 14560 0.0179 nan 0.0010 -0.0000
## 14580 0.0178 nan 0.0010 -0.0000
## 14600 0.0177 nan 0.0010 -0.0000
## 14620 0.0176 nan 0.0010 -0.0000
## 14640 0.0176 nan 0.0010 -0.0000
## 14660 0.0175 nan 0.0010 -0.0000
## 14680 0.0174 nan 0.0010 0.0000
## 14700 0.0173 nan 0.0010 -0.0000
## 14720 0.0172 nan 0.0010 -0.0000
## 14740 0.0171 nan 0.0010 -0.0000
## 14760 0.0171 nan 0.0010 -0.0000
## 14780 0.0170 nan 0.0010 -0.0000
## 14800 0.0169 nan 0.0010 -0.0000
## 14820 0.0168 nan 0.0010 -0.0000
## 14840 0.0167 nan 0.0010 -0.0000
## 14860 0.0167 nan 0.0010 -0.0000
## 14880 0.0166 nan 0.0010 -0.0000
## 14900 0.0165 nan 0.0010 -0.0000
## 14920 0.0164 nan 0.0010 -0.0000
## 14940 0.0163 nan 0.0010 -0.0000
## 14960 0.0162 nan 0.0010 -0.0000
## 14980 0.0162 nan 0.0010 -0.0000
## 15000 0.0161 nan 0.0010 -0.0000
save(gbmtest, file = "gbmtest.RCa")
#check performance using 5-fold cross-valiCion
best.iter <- gbm.perf(gbmtest,method="cv",plot.it=FALSE) #this gives you the optimal number of trees based on cv performance, other methods will over or under predict
print(best.iter)
## [1] 2897
gbm_error = data.frame(train.error = gbmtest$train.error,
trees = seq(1,n.trees))
plot <- ggplot(gbm_error, aes(x = trees, y = train.error))+
geom_line()
plot
ggsave(filename = "deviance_enviro_vector.jpg",
plot = plot)
## Saving 7 x 5 in image
#Stop the clock
(proc.time()-ptm)/60
## user system elapsed
## 0.063916667 0.005033333 0.202616667
load("gbmtest.RCa")
best.iter <- gbm.perf(gbmtest,method="cv",plot.it=FALSE) #this gives you the optimal number of trees based on cv performance, other methods
# output predictions on the TRAINING SET
output<-predict(gbmtest,
newdata=Train,
n.trees=best.iter,
type="response")
output<-cbind(output,Train$Species_no)
colnames(output)<-c("output","data")
rownames(output)<-rownames(Train)
output<-output[order(-output[,1]),]
# # AUC for Bernoulli distributed responses
par(mar = c(1,1,1,1))
auc=colAUC(output[,1],output[,2],
plotROC = TRUE)
print(auc)
## [,1]
## 0 vs. 1 1
pred<-prediction(output[,1],output[,2])
perf<-performance(pred,"tpr","fpr")
par(mar = c(1,1,1,1))
plot(perf,colorize=TRUE,main="ROC full model")
abline(a=0, b= 1)
# output predictions on the Test SET
output<-predict(gbmtest,
newdata=Test,
n.trees=best.iter,
type="response")
Test$pred = round(output)
output<-cbind(output,Test$Species_no)
colnames(output)<-c("output","data")
rownames(output)<-rownames(Test)
output<-output[order(-output[,1]),]
# # AUC for Bernoulli distributed responses
par(mar = c(1,1,1,1))
auc=colAUC(output[,1],output[,2],
plotROC = TRUE)
print(auc)
## [,1]
## 0 vs. 1 0.6041667
pred<-prediction(output[,1],output[,2])
perf<-performance(pred,"tpr","fpr")
par(mar = c(1,1,1,1))
plot(perf,colorize=TRUE,main="ROC full model test data")
# confusion matrix -- Test set
confusionMatrix(data = factor(Test$pred),
factor(Test$Species_no),
mode = "everything")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 4 4
## 1 2 4
##
## Accuracy : 0.5714
## 95% CI : (0.2886, 0.8234)
## No Information Rate : 0.5714
## P-Value [Acc > NIR] : 0.6105
##
## Kappa : 0.16
##
## Mcnemar's Test P-Value : 0.6831
##
## Sensitivity : 0.6667
## Specificity : 0.5000
## Pos Pred Value : 0.5000
## Neg Pred Value : 0.6667
## Precision : 0.5000
## Recall : 0.6667
## F1 : 0.5714
## Prevalence : 0.4286
## Detection Rate : 0.2857
## Detection Prevalence : 0.5714
## Balanced Accuracy : 0.5833
##
## 'Positive' Class : 0
##
# inds_observed_1_predicted_0 = which(Test$Species_no == 1 & Test$pred ==0)
#
# Test$Matches[inds_observed_1_predicted_0]
#
# inds_observed_0_predicted_1 = which(Test$Species_no == 0 & Test$pred ==1)
# Test$Matches[inds_observed_0_predicted_1]
# output predictions on the Train SET
output<-predict(gbmtest,
newdata=Train,
n.trees=best.iter,
type="response")
Train$pred = round(output)
# confusion matrix
confusionMatrix(data = factor(Train$pred),
factor(Train$Species_no),
mode = "everything")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 30 0
## 1 0 26
##
## Accuracy : 1
## 95% CI : (0.9362, 1)
## No Information Rate : 0.5357
## P-Value [Acc > NIR] : 6.611e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Precision : 1.0000
## Recall : 1.0000
## F1 : 1.0000
## Prevalence : 0.5357
## Detection Rate : 0.5357
## Detection Prevalence : 0.5357
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : 0
##
###permute labels and find AUC – bootstrap
load("C.Rdata")
#Start the clock
ptm<-proc.time()
permutedAUC<-c()
word = "binomial"
best.iter.list = c()
i=1
while (i <= 50) {
# for permutation loop
## random permutation of Label
randomLabel<-sample(C$Species_no)
pan2<-cbind(randomLabel,C)
#remove previous label
rm = "Species_no"
keep = setdiff(names(pan2),rm)
pan2 = pan2[,keep]
pan2[,1]<-sapply(pan2[,1],as.character)
## create training and test sets
intrain2<-createDataPartition(y=pan2$randomLabel,
p=0.8,
list=FALSE)
test2<-pan2[-intrain2,]
training2<-pan2[intrain2,]
check<-1-is.na(training2)*1
checksum<-apply(check,2,sum)
if(length(which(checksum>=2))==dim(training2)[2]){#this makes sure we don't get any columns with all zeros. Should be == to the number of columns
## random permutation of Labels ~ traits
label_col = which(names(training2)== "randomLabel")
names <- colnames(training2[,-c(label_col)])
y_col = label_col
model<-as.formula(paste(colnames(training2)[y_col], "~",
paste(names,collapse = "+"),
sep = ""))
gbm2<- gbm(model,
data=training2,
distribution="bernoulli",
n.trees=15000,
shrinkage=0.001,
interaction.depth=3,
bag.fraction=0.50,
train.fraction=1,
n.minobsinnode=3,
cv.folds=10,
keep.data=TRUE)
# verbose=TRUE)
#check performance using 5-fold cross-valiCion
best.iter2 <- gbm.perf(gbm2,method="cv",plot.it=FALSE) #OOB method under predicts
best.iter.list=c(best.iter.list, best.iter2)
# batsum2<-summary.gbm(gbm2,n.trees=best.iter,method=relative.influence,plotit=FALSE)
## LABEL
## predictions on the TRAINING SET
output2<-predict(gbm2, newdata=training2, n.trees=best.iter2, type="response")
output2<-cbind(output2,as.numeric(training2$randomLabel))
# colnames(output2)<-c("output","label")
# output2<-output2[order(-as.numeric(output2[,1])),]
# # training AUC for Bernoulli distributed responses
auc2=colAUC(output2[,1],output2[,2])
# Predictions on the TEST set
output.test2<-predict(gbm2, newdata=test2, n.trees=best.iter2, type="response")
output.test2<-cbind(output.test2,as.numeric(test2$randomLabel))
# colnames(output.test2)<-c("output","label")
# output.test2<-output.test2[order(-output.test2[,1]),]
# plot(output.test)
## test AUC for Bernoulli distributed responses
auctest2=colAUC(output.test2[,1],output.test2[,2])
permutedAUC[i]<-auctest2
print(auctest2)
i=i+1
print(i)#check where we are in bootstrap
} else i=i
}
## [,1]
## 0 vs. 1 0.547619
## [1] 2
## [,1]
## 0 vs. 1 0.5714286
## [1] 3
## [,1]
## 0 vs. 1 0.6666667
## [1] 4
## [,1]
## 0 vs. 1 0.9047619
## [1] 5
## [,1]
## 0 vs. 1 0.6666667
## [1] 6
## [,1]
## 0 vs. 1 0.6071429
## [1] 7
## [,1]
## 0 vs. 1 0.7142857
## [1] 8
## [,1]
## 0 vs. 1 0.7619048
## [1] 9
## [,1]
## 0 vs. 1 0.5
## [1] 10
## [,1]
## 0 vs. 1 0.5714286
## [1] 11
## [,1]
## 0 vs. 1 0.5952381
## [1] 12
## [,1]
## 0 vs. 1 0.547619
## [1] 13
## [,1]
## 0 vs. 1 0.7857143
## [1] 14
## [,1]
## 0 vs. 1 0.6428571
## [1] 15
## [,1]
## 0 vs. 1 0.6190476
## [1] 16
## [,1]
## 0 vs. 1 0.547619
## [1] 17
## [,1]
## 0 vs. 1 0.5952381
## [1] 18
## [,1]
## 0 vs. 1 0.5238095
## [1] 19
## [,1]
## 0 vs. 1 0.5952381
## [1] 20
## [,1]
## 0 vs. 1 0.5357143
## [1] 21
## [,1]
## 0 vs. 1 0.547619
## [1] 22
## [,1]
## 0 vs. 1 0.5714286
## [1] 23
## [,1]
## 0 vs. 1 0.6190476
## [1] 24
## [,1]
## 0 vs. 1 0.7261905
## [1] 25
## [,1]
## 0 vs. 1 0.5
## [1] 26
## [,1]
## 0 vs. 1 0.5238095
## [1] 27
## [,1]
## 0 vs. 1 0.6666667
## [1] 28
## [,1]
## 0 vs. 1 0.5
## [1] 29
## [,1]
## 0 vs. 1 0.5952381
## [1] 30
## [,1]
## 0 vs. 1 0.547619
## [1] 31
## [,1]
## 0 vs. 1 0.6666667
## [1] 32
## [,1]
## 0 vs. 1 0.5714286
## [1] 33
## [,1]
## 0 vs. 1 0.5
## [1] 34
## [,1]
## 0 vs. 1 0.6190476
## [1] 35
## [,1]
## 0 vs. 1 0.8095238
## [1] 36
## [,1]
## 0 vs. 1 0.547619
## [1] 37
## [,1]
## 0 vs. 1 0.5
## [1] 38
## [,1]
## 0 vs. 1 0.6666667
## [1] 39
## [,1]
## 0 vs. 1 0.702381
## [1] 40
## [,1]
## 0 vs. 1 0.547619
## [1] 41
## [,1]
## 0 vs. 1 0.7380952
## [1] 42
## [,1]
## 0 vs. 1 0.5714286
## [1] 43
## [,1]
## 0 vs. 1 0.5595238
## [1] 44
## [,1]
## 0 vs. 1 0.6190476
## [1] 45
## [,1]
## 0 vs. 1 0.5
## [1] 46
## [,1]
## 0 vs. 1 0.5238095
## [1] 47
## [,1]
## 0 vs. 1 0.5238095
## [1] 48
## [,1]
## 0 vs. 1 0.6190476
## [1] 49
## [,1]
## 0 vs. 1 0.7261905
## [1] 50
## [,1]
## 0 vs. 1 0.6190476
## [1] 51
sum(is.na(permutedAUC)*1) #how many NAs
## [1] 0
permutedAUC2<-na.omit(permutedAUC)
mean(permutedAUC2)
## [1] 0.6085714
sd(permutedAUC2)
## [1] 0.09112981
#Stop the clock
(proc.time()-ptm)/60
## user system elapsed
## 2.0804500 0.1201333 9.0096667
write.csv(best.iter.list, file = paste0("best.iter.list.","AUC.", word, ".csv"))
###plot relative influence
load("gbmtest.Rdata")
x = summary(gbmtest)
#
x.df= data.frame(variable = x$var,
relative.influence = x$rel.inf)
x.df.0 = subset(x.df, relative.influence==0)
dim(x.df.0)[1]
## [1] 6
x.df = subset(x.df, relative.influence>=1)#take only interesting variables
x.df$variable = factor(x.df$variable, levels = x.df$variable[order(x.df$relative.influence)])
save(x.df, file = "x.df.Rdata")
ggplot(data = x.df, aes(x = variable, y =relative.influence))+
ylab("relative influence (%)")+
xlab("variable")+
geom_bar(stat="identity")+
coord_flip()
#
ggsave("Figure.relative.influence.jpg")
## Saving 7 x 5 in image
##Bootstrap permutations for distribution of relative influence –
load("C.Rdata")
df = C
#Start the clock
ptm<-proc.time()
permutedAUC<-c()
permutedAUC_train<-c()
out = NULL
list_save <- list()
intrain_list = NULL
best.iter2.list = c()
bootstrap_runs=50
i = 1
while (i <= bootstrap_runs) {
# for permutation loop
## create training and test sets
pan2 = df
intrain2<-createDataPartition(y=pan2$Species_no,
p=0.8,
list=FALSE)
test2<-pan2[-intrain2,]
training2<-pan2[intrain2,]
intrain_list[[i]]=intrain2
check<-1-is.na(training2)*1
checksum<-apply(check,2,sum)
n_cols = dim(training2)[2]
if(length(which(checksum>=2))==n_cols){#this makes sure we don't get any columns with all zeros. Should be == to the number of columns
label_col = which(names(training2)== "Species_no")
names <- colnames(training2[,-c(label_col)])
y_col = label_col
model<-as.formula(paste(colnames(training2)[y_col], "~",
paste(names,collapse = "+"),
sep = ""))
gbm2<- gbm(model,
data=training2,
distribution="bernoulli",
n.trees=15000,
shrinkage=0.01,
interaction.depth=3,
bag.fraction=0.50,
train.fraction=1,
n.minobsinnode=3,
cv.folds=10,
keep.data=TRUE)
list_save <- c(list_save, list(gbm2))
#get the relative influence info
x = summary(gbm2)
x.df= data.frame(variable = x$var,
relative.influence = x$rel.inf)
x.df$variable = factor(x.df$variable, levels = x.df$variable[order(x.df$relative.influence)])
#save these results
x.df$i = i
out = rbind(out, x.df)
#check performance using 5-fold cross-validation
best.iter2 <- gbm.perf(gbm2,method="cv",plot.it=FALSE) #OOB method under predicts
best.iter2.list = c(best.iter2.list, best.iter2)
## LABEL
## predictions on the TRAINING SET
output2<-predict(gbm2, newdata=training2, n.trees=best.iter2, type="response")
output2<-cbind(output2,as.numeric(training2$Species_no))
# # training AUC for Bernoulli distributed responses
auc2=colAUC(output2[,1],output2[,2])
permutedAUC_train[i]<-auc2
# Predictions on the TEST set
output.test2<-predict(gbm2, newdata=test2, n.trees=best.iter2, type="response")
output.test2<-cbind(output.test2,as.numeric(test2$Species_no))
## test AUC for Bernoulli distributed responses
auctest2=colAUC(output.test2[,1],output.test2[,2])
permutedAUC[i]<-auctest2
print(auctest2)
i=i+1
print(i)#check where we are in bootstrap
} else i=i
}
## [,1]
## 0 vs. 1 0.7959184
## [1] 2
## [,1]
## 0 vs. 1 0.8666667
## [1] 3
## [,1]
## 0 vs. 1 0.7708333
## [1] 4
## [,1]
## 0 vs. 1 0.5777778
## [1] 5
## [,1]
## 0 vs. 1 0.7083333
## [1] 6
## [,1]
## 0 vs. 1 0.9583333
## [1] 7
## [,1]
## 0 vs. 1 0.875
## [1] 8
## [,1]
## 0 vs. 1 0.8333333
## [1] 9
## [,1]
## 0 vs. 1 0.6734694
## [1] 10
## [,1]
## 0 vs. 1 0.675
## [1] 11
## [,1]
## 0 vs. 1 0.5208333
## [1] 12
## [,1]
## 0 vs. 1 0.8571429
## [1] 13
## [,1]
## 0 vs. 1 0.7111111
## [1] 14
## [,1]
## 0 vs. 1 0.6888889
## [1] 15
## [,1]
## 0 vs. 1 0.75
## [1] 16
## [,1]
## 0 vs. 1 0.8444444
## [1] 17
## [,1]
## 0 vs. 1 0.8444444
## [1] 18
## [,1]
## 0 vs. 1 0.8
## [1] 19
## [,1]
## 0 vs. 1 0.5714286
## [1] 20
## [,1]
## 0 vs. 1 0.625
## [1] 21
## [,1]
## 0 vs. 1 0.6326531
## [1] 22
## [,1]
## 0 vs. 1 0.6530612
## [1] 23
## [,1]
## 0 vs. 1 0.8333333
## [1] 24
## [,1]
## 0 vs. 1 0.6041667
## [1] 25
## [,1]
## 0 vs. 1 0.75
## [1] 26
## [,1]
## 0 vs. 1 0.7708333
## [1] 27
## [,1]
## 0 vs. 1 0.6666667
## [1] 28
## [,1]
## 0 vs. 1 0.6875
## [1] 29
## [,1]
## 0 vs. 1 0.7346939
## [1] 30
## [,1]
## 0 vs. 1 0.6875
## [1] 31
## [,1]
## 0 vs. 1 0.75
## [1] 32
## [,1]
## 0 vs. 1 0.7708333
## [1] 33
## [,1]
## 0 vs. 1 0.8
## [1] 34
## [,1]
## 0 vs. 1 0.7142857
## [1] 35
## [,1]
## 0 vs. 1 0.7755102
## [1] 36
## [,1]
## 0 vs. 1 0.6938776
## [1] 37
## [,1]
## 0 vs. 1 0.7111111
## [1] 38
## [,1]
## 0 vs. 1 0.75
## [1] 39
## [,1]
## 0 vs. 1 1
## [1] 40
## [,1]
## 0 vs. 1 0.6875
## [1] 41
## [,1]
## 0 vs. 1 0.6041667
## [1] 42
## [,1]
## 0 vs. 1 0.95
## [1] 43
## [,1]
## 0 vs. 1 0.8444444
## [1] 44
## [,1]
## 0 vs. 1 0.755102
## [1] 45
## [,1]
## 0 vs. 1 0.8125
## [1] 46
## [,1]
## 0 vs. 1 0.8541667
## [1] 47
## [,1]
## 0 vs. 1 0.8571429
## [1] 48
## [,1]
## 0 vs. 1 0.5918367
## [1] 49
## [,1]
## 0 vs. 1 0.9375
## [1] 50
## [,1]
## 0 vs. 1 0.65
## [1] 51
save(intrain_list, file = "intrain_list_presence.Rdata")
sum(is.na(permutedAUC)*1) #how many NAs
## [1] 0
permutedAUC2<-na.omit(permutedAUC)
mean(permutedAUC2)
## [1] 0.7495669
sd(permutedAUC2)
## [1] 0.1079915
sum(is.na(permutedAUC_train)*1) #how many NAs
## [1] 0
permutedAUC2_train<-na.omit(permutedAUC_train)
mean(permutedAUC2_train )
## [1] 0.9992828
sd(permutedAUC2_train)
## [1] 0.001489297
save(best.iter2.list, file = "best.iter2.list.binomial.rel.inf.Rdata")
#Stop the clock
(proc.time()-ptm)/60
## user system elapsed
## 2.543850 0.118100 8.913083
#summarize the relative influence data
out_sum <- out %>%
group_by(variable) %>%
summarize(mean_influence = mean(relative.influence)) %>%
filter(mean_influence>1)
#get just the data for variables with mean influence greater than 1%
out_high = subset(out, variable %in% out_sum$variable)
save(out_high, file = "out_high.Rdata")
load("out_high.Rdata")
plot<- ggplot(data = out_high, aes(x = variable, y =relative.influence))+
ylab("relative influence (%)")+
xlab("variable")+
geom_boxplot()+
coord_flip()
ggsave(plot = plot, filename = "Figure.relative.influence.boxplot.jpg")
## Saving 7 x 5 in image
save(out, file = "rel.inf.presence.Rdata")
#save list_save
save(list_save, file = "list_save_presence.Rdata")
load("list_save_presence.Rdata")